Tech Arsenal 1

home *** CD-ROM | disk | FTP | other *** search

/ Tech Arsenal 1 / Tech Arsenal (Arsenal Computer).ISO / tek-02 / tptc16.zip / TPCSCAN.INC < prev next >

Wrap

Text File | 1993-01-04 | 16KB | 665 lines

(* * TPTC - Turbo Pascal to C translator * * (C) 1988 Samuel H. Smith (rev. 13-Feb-88) * *) (********************************************************************) (* * lexical scanner * *) function coctal(n: integer): anystring; {convert an integer into a c style octal character literal} function odigit(n: integer): char; (* convert an integer into an octal digit *) begin odigit := chr( (n and 7) + ord('0') ); end; begin coctal := '''\' + odigit(n shr 6) + odigit(n shr 3) + odigit(n) + ''''; toktype := strng; end; (********************************************************************) procedure getchar; {consume the current char and get the next one} begin if read_include then begin if eof(inclfd) then begin close(inclfd); read_include := false; writeln(ofd[level]); if includeinclude then writeln(ofd[level],'/* end of ',incl_name,' */') else begin discard_nested; write(con,^M^J,LJUST(' ',level*2+15),srcfiles[level],^M); end; end else read(inclfd, nextc); end; if not read_include then begin if eof(infd) then endfile; read(infd, nextc); end; if nextc = ^J then begin inc(srclines[level]); if (srclines[level] mod 6) = 0 then write(con,srcfiles[level],'(',srclines[level],') '^M); abortcheck; end; end; (********************************************************************) function usec: char; {use up the current character(return it) and get the next one from the input stream} var c: char; begin c := nextc; getchar; usec := c; end; (********************************************************************) function newc(n: string40): string40; {replace the current character with a different one and get the next character from the input stream} var c: char; begin c := nextc; getchar; newc := n; end; (********************************************************************) procedure scan_ident; {scan an identifier; output is ltok; nextc is first character following the identifier; toktype = identifier; this is the protocol for all of the scan_xxxx procedures in the lexical analyzer} begin toktype := unknown; ltok := ''; repeat case nextc of 'A'..'Z': begin if map_lower then nextc := chr( ord(nextc)+32 ); ltok := ltok + nextc; getchar; end; 'a'..'z', '0'..'9', '_','@': ltok := ltok + usec; else toktype := identifier; end; until toktype = identifier; end; (********************************************************************) procedure scan_preproc; {scan a tshell preprocessor directive; same syntax as C already} begin write(ofd[level],'#'); repeat write(ofd[level],nextc); getchar; until nextc = ^M; getchar; writeln(ofd[level]); toktype := unknown; end; (********************************************************************) procedure scan_number; {scan a number; this also processes #nnn character literals, which are converted into octal character literals. imbedded periods are processed, and a special condition is noted for trailing periods. this is needed for scanning the ".." keyword when used after numbers. an ungetchar facility would be more general, but isn't needed anywhere else. in pascal/mt+, #nnn is translated into nnnL } var hasdot: boolean; octal: boolean; islong: boolean; begin hasdot := false; islong := false; octal := false; toktype := number; (* check for preprocessor directives, character literals or long literals *) if nextc = '#' then begin ltok := ''; if mt_plus then islong := true else octal := true; end; getchar; (* check for preprocessor directives *) if octal and (nextc in ['a'..'z']) then scan_preproc else repeat case nextc of '0'..'9': ltok := ltok + usec; '.': if hasdot then begin if ltok[length(ltok)] = '.' then begin ltok[0] := pred(ltok[0]); {remove trailing ., part of ..} if octal then ltok := coctal(atoi(ltok)); extradot := true; end; exit; end else begin hasdot := true; ltok := ltok + usec; end; else begin if octal then ltok := coctal(atoi(ltok)) else if islong then ltok := ltok + 'L'; exit; end; end; until true=false; end; (********************************************************************) procedure scan_hat; {scan tokens starting with ^ - returns ^X as a character literal corresponding to the specified control character. returns ^ident as an identifier with the leading ^ intact. also scans ^. and ^[.} var c: char; begin getchar; if (nextc = '.') or (nextc = '[') then ltok := '^' + usec {^. or ^[} else if nextc in ['A'..'Z','a'..'z','@'..'_'] then begin ltok := nextc; scan_ident; if length(ltok) = 1 then {^c = control char} ltok := coctal( ord(upcase(ltok[1])) - ord('@') ) else ltok := '^' + ltok; {^ident = pointer to ident} end; end; (********************************************************************) procedure scan_dot; {scans tokens starting with "."; knows about the 'extra dot' condition that comes up in number scanning. returns a token of either '.' or '..'} begin getchar; if (nextc = '.') or extradot then begin ltok := '..'; extradot := false; end; if nextc = '.' then getchar; end; (********************************************************************) procedure scan_string; {scans a literal string. processes imbedded quotes ala pascal. translates the string into a C string with the proper escapes on imbedded quotes. converts single character strings into character constants. these are sometimes converted back to strings when the parser needs to} begin toktype := unknown; ltok := '"'; getchar; {consume the open quote} repeat if nextc in [^J,^M] then begin syntax('Closing quote expected (scan_string)'); exit; end; if nextc = '''' then begin getchar; {consume the quote} if nextc = '''' then ltok := ltok + usec {double quotes are coded as a single quote} else begin {end of string} ltok := ltok + '"'; toktype := strng; end; end else if nextc = '"' then ltok := ltok + newc('\"') else if nextc = '\' then ltok := ltok + newc('\\') else ltok := ltok + usec; until toktype = strng; if length(ltok) = 3 then begin ltok[1] := ''''; ltok[3] := ''''; end; if ltok = '"\""' then ltok := '''"''' else if (ltok = '"''"') or (ltok = '''''''') then ltok := '''\'''''; end; (********************************************************************) procedure scan_hex; {scans a hex constant and returns it as a C style 0xHHHH literal} begin getchar; {consume the '$'} ltok := '0x'; while nextc in ['0'..'9', 'A'..'F', 'a'..'f'] do ltok := ltok + usec; toktype := number; end; (********************************************************************) procedure scan_pragma; {scans a turbo pascal compiler option and translates it into a general "pragma" ','nd. include directive is translated into the #include ','nd. returns with the first non-blank after the pragma} var code: char; prag: anystring; arg: anystring; begin repeat if nextc = ',' then newline; getchar; {consume the $ or ,} code := upcase(usec); arg := usec; if arg = '+' then arg := 'ON' else if arg = '-' then arg := 'OFF' else begin {decode numeric or string pragma params} if arg = ' ' then arg := ''; while not (nextc in [' ','*','}',',']) do ltok := ltok + usec; arg := arg; end; case code of 'I': if (arg = 'ON') or (arg = 'OFF') then prag := '/* I(' + arg + ')' + ' */' else begin prag := '#include "' + arg + '"' + ^M^J; {$I-} assign(inclfd, arg); reset(inclfd); {$I+} if ioresult = 0 then begin read_include := true; incl_name := arg; if not includeinclude then begin write(ofd[level],prag,' '); enter_nested; srcfiles[level] := incl_name; srclines[level] := 0; write(con,^M^J,'':level*2+15, srcfiles[level],^M); end; end; end; else prag := '/* ' + code + '(' + arg + ')' + ' */'; end; write(ofd[level],prag,' '); while nextc = ' ' do getchar; until nextc <> ','; end; (********************************************************************) procedure scan_curlycomment; {processes a curly-brace enclosed comment} begin getchar; {consume the open comment} if nextc = '$' then begin scan_pragma; if nextc = '}' then begin getchar; exit; end; end; write(ofd[level],' /* '); while nextc <> '}' do begin write(ofd[level],nextc); getchar; end; writeln(ofd[level],' */ '); getchar; {consume the close comment} end; (********************************************************************) procedure scan_parencomment; {process a (* enclosed comment} begin getchar; {consume the *} if nextc = '$' then scan_pragma; write(ofd[level],'/*'); repeat write(ofd[level],nextc); if nextc = '*' then begin getchar; if nextc = ')' then begin writeln(ofd[level],'/ '); getchar; exit; end; end else getchar; until true=false; end; (********************************************************************) procedure scan_blanks; {scan white space. this procedure sometimes passes whitespace to the output. it keeps track of the indentation of the current line so it can be used by newline} var linestart: boolean; indent: anystring; valid: boolean; begin linestart := false; indent := ''; valid := false; repeat case nextc of ^J,^M: begin if nospace=false then write(ofd[level],nextc); indent := ''; linestart := true; getchar; end; ' ',^I,^@,^L: indent := indent + usec; '#': if linestart then begin write(ofd[level],indent); {pass preprocessor directives} indent := ''; {without change (single-line only)} repeat write(ofd[level],nextc); getchar; until nextc = ^M; getchar; writeln(ofd[level]); end else valid := true; else valid := true; end; until valid; if linestart then begin spaces := indent; if nospace=false then write(ofd[level],spaces); end; end; (********************************************************************) procedure scan_tok; {scans the next lexical token; returns the token in ltok and toktype} begin scan_blanks; toktype := unknown; ltok := nextc; case nextc of 'a'..'z', '_', 'A'..'Z': scan_ident; '''': scan_string; '0'..'9': scan_number; '#': begin scan_number; if toktype = unknown then scan_tok; {in case of #directive} end; '$': scan_hex; '<': begin getchar; if (nextc = '>') or (nextc = '=') then ltok := '<' + usec; end; '>': begin getchar; if nextc = '=' then ltok := '>' + usec; end; ':': begin getchar; if nextc = '=' then ltok := ':' + usec; end; '^': scan_hat; '.': scan_dot; '{': begin scan_curlycomment; scan_tok; end; '(': begin getchar; if nextc = '*' then begin scan_parencomment; scan_tok; end; end; else getchar; {consume the unknown char} end; end; (********************************************************************) procedure gettok; {get the next input token; this is the top level of the lexical analyzer. it returns ltok, tok(ltok in upper case), toktype. it translates BEGIN and END into braces; it checks for statement and section keywords} var i: integer; begin scan_tok; tok := ltok; if toktype = identifier then begin stoupper(tok); if tok = 'BEGIN' then begin tok := '{'; ltok := tok; toktype := keyword; exit; end; if tok = 'END' then begin tok := '}'; ltok := tok; toktype := keyword; exit; end; (* check for statement keywords *) for i := 1 to nkeywords do if tok = keywords[i] then begin toktype := keyword; exit; end; end; end; (********************************************************************) function usetok: string80; {return (use) and consume current token} var tv: string80; begin tv := ltok; gettok; usetok := tv; end;